{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-03-23T04:41:51.351008Z",
     "start_time": "2025-03-23T04:41:51.129095Z"
    }
   },
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.model_selection import cross_val_score"
   ],
   "outputs": [],
   "execution_count": 34
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 1. 获取数据集",
   "id": "22cb16341f6dc547"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:05:56.562783Z",
     "start_time": "2025-03-23T04:05:31.129728Z"
    }
   },
   "cell_type": "code",
   "source": "data = pd.read_csv('./FBlocation/train.csv')",
   "id": "c86e1134d0f5d161",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:10:34.500209Z",
     "start_time": "2025-03-23T04:10:34.376849Z"
    }
   },
   "cell_type": "code",
   "source": "data.head()",
   "id": "fc19e0c5ca634caf",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   row_id       x       y  accuracy    time    place_id\n",
       "0       0  0.7941  9.0809        54  470702  8523065625\n",
       "1       1  5.9567  4.7968        13  186555  1757726713\n",
       "2       2  8.3078  7.0407        74  322648  1137537235\n",
       "3       3  7.3665  2.5165        65  704587  6567393236\n",
       "4       4  4.0961  1.1307        31  472130  7440663949"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>row_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "      <th>place_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.7941</td>\n",
       "      <td>9.0809</td>\n",
       "      <td>54</td>\n",
       "      <td>470702</td>\n",
       "      <td>8523065625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5.9567</td>\n",
       "      <td>4.7968</td>\n",
       "      <td>13</td>\n",
       "      <td>186555</td>\n",
       "      <td>1757726713</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>8.3078</td>\n",
       "      <td>7.0407</td>\n",
       "      <td>74</td>\n",
       "      <td>322648</td>\n",
       "      <td>1137537235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>7.3665</td>\n",
       "      <td>2.5165</td>\n",
       "      <td>65</td>\n",
       "      <td>704587</td>\n",
       "      <td>6567393236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4.0961</td>\n",
       "      <td>1.1307</td>\n",
       "      <td>31</td>\n",
       "      <td>472130</td>\n",
       "      <td>7440663949</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "",
   "id": "f4756de9fd54355e"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 2. 基本的数据处理",
   "id": "a582fd301600bd72"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:13:38.202128Z",
     "start_time": "2025-03-23T04:13:37.342888Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#  1. 缩小数据范围\n",
    "data = data.query(\"x < 2.5 & x > 2 & y < 1.5 & y > 1\")"
   ],
   "id": "45ffac1d91c7164a",
   "outputs": [],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:13:47.929823Z",
     "start_time": "2025-03-23T04:13:47.891891Z"
    }
   },
   "cell_type": "code",
   "source": "data.head()",
   "id": "b1ca91d49b348a97",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      row_id       x       y  accuracy    time    place_id\n",
       "112      112  2.2360  1.3655        66  623174  7663031065\n",
       "180      180  2.2003  1.2541        65  610195  2358558474\n",
       "367      367  2.4108  1.3213        74  579667  6644108708\n",
       "874      874  2.0822  1.1973       320  143566  3229876087\n",
       "1022    1022  2.0160  1.1659        65  207993  3244363975"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>row_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "      <th>place_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>112</th>\n",
       "      <td>112</td>\n",
       "      <td>2.2360</td>\n",
       "      <td>1.3655</td>\n",
       "      <td>66</td>\n",
       "      <td>623174</td>\n",
       "      <td>7663031065</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>180</td>\n",
       "      <td>2.2003</td>\n",
       "      <td>1.2541</td>\n",
       "      <td>65</td>\n",
       "      <td>610195</td>\n",
       "      <td>2358558474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>367</td>\n",
       "      <td>2.4108</td>\n",
       "      <td>1.3213</td>\n",
       "      <td>74</td>\n",
       "      <td>579667</td>\n",
       "      <td>6644108708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>874</th>\n",
       "      <td>874</td>\n",
       "      <td>2.0822</td>\n",
       "      <td>1.1973</td>\n",
       "      <td>320</td>\n",
       "      <td>143566</td>\n",
       "      <td>3229876087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1022</th>\n",
       "      <td>1022</td>\n",
       "      <td>2.0160</td>\n",
       "      <td>1.1659</td>\n",
       "      <td>65</td>\n",
       "      <td>207993</td>\n",
       "      <td>3244363975</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:22:41.633861Z",
     "start_time": "2025-03-23T04:22:41.550502Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#  2. 处理时间特征\n",
    "time_value = pd.to_datetime(data['time'], unit='s')\n",
    "date = pd.DatetimeIndex(time_value)\n",
    "#  .loc的用法：loc[行标签,列标签]，作用是修改数据框的某些行或列的值。\n",
    "data.loc[:, 'day'] = date.day\n",
    "data.loc[:, 'hour'] = date.hour\n",
    "data.loc[:,'weekday'] = date.weekday"
   ],
   "id": "6125df753d2b0f2e",
   "outputs": [],
   "execution_count": 12
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:23:42.802065Z",
     "start_time": "2025-03-23T04:23:42.762229Z"
    }
   },
   "cell_type": "code",
   "source": "data.head()",
   "id": "5a14120608dad00b",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      row_id       x       y  accuracy    time    place_id  day  hour  weekday\n",
       "112      112  2.2360  1.3655        66  623174  7663031065    8     5        3\n",
       "180      180  2.2003  1.2541        65  610195  2358558474    8     1        3\n",
       "367      367  2.4108  1.3213        74  579667  6644108708    7    17        2\n",
       "874      874  2.0822  1.1973       320  143566  3229876087    2    15        4\n",
       "1022    1022  2.0160  1.1659        65  207993  3244363975    3     9        5"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>row_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>accuracy</th>\n",
       "      <th>time</th>\n",
       "      <th>place_id</th>\n",
       "      <th>day</th>\n",
       "      <th>hour</th>\n",
       "      <th>weekday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>112</th>\n",
       "      <td>112</td>\n",
       "      <td>2.2360</td>\n",
       "      <td>1.3655</td>\n",
       "      <td>66</td>\n",
       "      <td>623174</td>\n",
       "      <td>7663031065</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>180</td>\n",
       "      <td>2.2003</td>\n",
       "      <td>1.2541</td>\n",
       "      <td>65</td>\n",
       "      <td>610195</td>\n",
       "      <td>2358558474</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>367</td>\n",
       "      <td>2.4108</td>\n",
       "      <td>1.3213</td>\n",
       "      <td>74</td>\n",
       "      <td>579667</td>\n",
       "      <td>6644108708</td>\n",
       "      <td>7</td>\n",
       "      <td>17</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>874</th>\n",
       "      <td>874</td>\n",
       "      <td>2.0822</td>\n",
       "      <td>1.1973</td>\n",
       "      <td>320</td>\n",
       "      <td>143566</td>\n",
       "      <td>3229876087</td>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1022</th>\n",
       "      <td>1022</td>\n",
       "      <td>2.0160</td>\n",
       "      <td>1.1659</td>\n",
       "      <td>65</td>\n",
       "      <td>207993</td>\n",
       "      <td>3244363975</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 13
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:32:06.387127Z",
     "start_time": "2025-03-23T04:32:06.316163Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#  3. 过滤签到次数少的地点\n",
    "place_count = data.groupby('place_id').count()['row_id']\n",
    "place_count[place_count > 3].head()\n",
    "data_final = data[data['place_id'].isin(place_count[place_count > 3].index.values)]"
   ],
   "id": "c16e13b4d091a6ff",
   "outputs": [],
   "execution_count": 21
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 3. 筛选特征值和目标值",
   "id": "33c4f3d86af1959d"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:35:12.891819Z",
     "start_time": "2025-03-23T04:35:12.876779Z"
    }
   },
   "cell_type": "code",
   "source": [
    "x = data_final[['x', 'y', 'accuracy', 'day', 'hour', 'weekday']]\n",
    "y = data_final['place_id']"
   ],
   "id": "7281a45ba1070485",
   "outputs": [],
   "execution_count": 26
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:35:25.231403Z",
     "start_time": "2025-03-23T04:35:25.220255Z"
    }
   },
   "cell_type": "code",
   "source": "y.head()",
   "id": "64e6ce446cdba196",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "112     7663031065\n",
       "367     6644108708\n",
       "874     3229876087\n",
       "1022    3244363975\n",
       "1045    6438240873\n",
       "Name: place_id, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 27
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 4. 划分训练集和测试集",
   "id": "5f37080ae89ec3b7"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:37:13.448633Z",
     "start_time": "2025-03-23T04:37:13.413656Z"
    }
   },
   "cell_type": "code",
   "source": "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)",
   "id": "2079fbdbe82848e7",
   "outputs": [],
   "execution_count": 30
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 5. 特征工程:标准化",
   "id": "bd53a8e0e4e8c395"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T04:40:11.678980Z",
     "start_time": "2025-03-23T04:40:11.636917Z"
    }
   },
   "cell_type": "code",
   "source": [
    "transfer = StandardScaler()\n",
    "x_train = transfer.fit_transform(x_train)\n",
    "x_test = transfer.transform(x_test)"
   ],
   "id": "9b70e1b4d84a0131",
   "outputs": [],
   "execution_count": 32
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 6. KNN算法预估器",
   "id": "9590039eb8326f94"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T05:47:29.241347Z",
     "start_time": "2025-03-23T05:46:46.991079Z"
    }
   },
   "cell_type": "code",
   "source": [
    "estimator = KNeighborsClassifier()\n",
    "# 加入网格搜索与交叉验证\n",
    "# 参数准备\n",
    "param_dict = {\"n_neighbors\": [3, 5, 7, 9]}\n",
    "estimator = GridSearchCV(estimator, param_dict, cv = 3)\n",
    "estimator.fit(x_train, y_train)"
   ],
   "id": "903dceea648b7e5c",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\python\\python安装程序\\Lib\\site-packages\\sklearn\\model_selection\\_split.py:805: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=3.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=3, estimator=KNeighborsClassifier(),\n",
       "             param_grid={'n_neighbors': [3, 5, 7, 9]})"
      ],
      "text/html": [
       "<style>#sk-container-id-3 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: #000;\n",
       "  --sklearn-color-text-muted: #666;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-3 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-3 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: flex;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "  align-items: start;\n",
       "  justify-content: space-between;\n",
       "  gap: 0.5em;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label .caption {\n",
       "  font-size: 0.6rem;\n",
       "  font-weight: lighter;\n",
       "  color: var(--sklearn-color-text-muted);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-3 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-3 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 0.5em;\n",
       "  text-align: center;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-3 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=3, estimator=KNeighborsClassifier(),\n",
       "             param_grid={&#x27;n_neighbors&#x27;: [3, 5, 7, 9]})</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>GridSearchCV</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=3, estimator=KNeighborsClassifier(),\n",
       "             param_grid={&#x27;n_neighbors&#x27;: [3, 5, 7, 9]})</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>best_estimator_: KNeighborsClassifier</div></div></label><div class=\"sk-toggleable__content fitted\"><pre>KNeighborsClassifier(n_neighbors=7)</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>KNeighborsClassifier</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.neighbors.KNeighborsClassifier.html\">?<span>Documentation for KNeighborsClassifier</span></a></div></label><div class=\"sk-toggleable__content fitted\"><pre>KNeighborsClassifier(n_neighbors=7)</pre></div> </div></div></div></div></div></div></div></div></div>"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 42
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "### 7. 模型评估",
   "id": "d4a2e3d42c886038"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T05:48:21.233039Z",
     "start_time": "2025-03-23T05:48:14.825820Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 方法一：直接比对真实值与预测值\n",
    "y_pred = estimator.predict(x_test)\n",
    "print(\"y_pred:\\n\", y_pred)\n",
    "print(\"直接比对真实值与预测值:\\n\", y_test == y_pred)"
   ],
   "id": "d5102cf978a74549",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y_pred:\n",
      " [5600661085 1031277804 4374582281 ... 8032468532 1380291167 2367979052]\n",
      "直接比对真实值与预测值:\n",
      " 1669707      True\n",
      "142559      False\n",
      "10856610    False\n",
      "28047066     True\n",
      "23520941     True\n",
      "            ...  \n",
      "16359747    False\n",
      "4039376      True\n",
      "2795110     False\n",
      "27714926    False\n",
      "28720896     True\n",
      "Name: place_id, Length: 16182, dtype: bool\n"
     ]
    }
   ],
   "execution_count": 43
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-03-23T05:48:33.914699Z",
     "start_time": "2025-03-23T05:48:26.750609Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 方法二：计算准确率\n",
    "score = estimator.score(x_test, y_test)\n",
    "print(\"准确率:\", score)\n",
    "# 最佳参数\n",
    "print(\"最佳参数:\", estimator.best_params_)\n",
    "# 最佳结果\n",
    "print(\"最佳结果:\", estimator.best_score_)\n",
    "# 最佳估计器\n",
    "print(\"最佳估计器:\", estimator.best_estimator_)\n",
    "# 交叉验证结果\n",
    "print(\"交叉验证结果:\", estimator.cv_results_)"
   ],
   "id": "117ae06f8772e30c",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率: 0.37325423309850453\n",
      "最佳参数: {'n_neighbors': 7}\n",
      "最佳结果: 0.3378445186009145\n",
      "最佳估计器: KNeighborsClassifier(n_neighbors=7)\n",
      "交叉验证结果: {'mean_fit_time': array([0.19288707, 0.12039812, 0.11038868, 0.11995745]), 'std_fit_time': array([0.11288401, 0.00842825, 0.00457244, 0.00559179]), 'mean_score_time': array([3.89440783, 3.09372711, 3.13653596, 3.27182349]), 'std_score_time': array([1.43449681, 0.03806616, 0.14092669, 0.02367779]), 'param_n_neighbors': masked_array(data=[3, 5, 7, 9],\n",
      "             mask=[False, False, False, False],\n",
      "       fill_value=999999), 'params': [{'n_neighbors': 3}, {'n_neighbors': 5}, {'n_neighbors': 7}, {'n_neighbors': 9}], 'split0_test_score': array([0.32647386, 0.34060994, 0.33861698, 0.33773637]), 'split1_test_score': array([0.32109752, 0.33384316, 0.33657768, 0.33412125]), 'split2_test_score': array([0.32295143, 0.33671672, 0.3383389 , 0.3350482 ]), 'mean_test_score': array([0.3235076 , 0.33705661, 0.33784452, 0.33563527]), 'std_test_score': array([0.00222984, 0.00277296, 0.00090296, 0.00153314]), 'rank_test_score': array([4, 2, 1, 3], dtype=int32)}\n"
     ]
    }
   ],
   "execution_count": 44
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "",
   "id": "74abd5b927348b37"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
